Introduction

This assignment aims to analyze the policing dataset by using advanced graphics and interactive plots. I have received two datasets - ‘crime23.csv’ and ‘temp2023.csv’.

The ‘crime23’ dataset contains information about crimes that occurred in Colchester in 2023. The dataset provides details such as the type and location of the crime, the date of the crime in the format of year and month (Y-m), and the status of the offence outcome.

The ‘temp2023’ dataset contains daily climate data collected from a weather station near Colchester. This dataset provides information on various measures such as temperature, wind speed, pressure, and visibility.

crime23 <- read.csv('crime23.csv')
temp2023 <- read.csv('temp2023.csv')

Data Exploration

The two-way table displays data on various types of crime that occurred in Colchester in 2023. This crime data includes shoplifting, anti-social behavior, public order, and others. The data is presented in rows and columns to make it easier to understand. The table shows that there were 14 different categories of crime, with violent crime being the most frequent, having a frequency of 2633 cases. The second most frequent crime was anti-social behavior, with 677 cases. The least frequent crime was possession of weapons, with only 74 cases.

attach(crime23)
ttable <- table(category, date) 
two_way_table <- sort(table(category), decreasing = TRUE)
knitr::kable(two_way_table, caption = 'Colchester Crime Frequency in 2023', col.names = c('Crime', 'Frequency'))
Colchester Crime Frequency in 2023
Crime Frequency
violent-crime 2633
anti-social-behaviour 677
criminal-damage-arson 581
shoplifting 554
public-order 532
other-theft 491
vehicle-crime 406
bicycle-theft 235
burglary 225
drugs 208
robbery 94
other-crime 92
theft-from-the-person 76
possession-of-weapons 74

The following barplot depicts the number of crimes reported each month in the year 2023, using a scale fill gradient to represent the frequency of crimes reported. Darker blue indicates a higher number of reported cases, whereas lighter blue represents a lower number. The plot reveals that January and September had the highest reported offences for the year 2023.

CrimeCounts <- table(crime23$date)
CrimeOffenceCounts <- as.data.frame(CrimeCounts)
names(CrimeOffenceCounts) <- c("Month", "CrimeCounts")

violent_crime_per_month <- ggplot(CrimeOffenceCounts, aes(x = Month, y = CrimeCounts, fill = CrimeCounts)) +
  geom_bar(stat = 'identity', width = 0.7) +
  labs(title = "Number of Reported Offences per Month in 2023",
       x = "Month",
       y = "Number of Offences") +
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

violent_crime_per_month

frequency_by_street <- crime23 %>%
  group_by(street_name) %>%
  summarise(total_offences = n()) %>%
  arrange(desc(total_offences)) %>%
  slice(2:11) 

Offences_by_street <- ggplot(data = frequency_by_street, aes(x = street_name, y = total_offences)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Top 10 Frequency of Offenses by Street Name",
       x = "Street Name",
       y = "Total Offenses") +
  theme(axis.text.x = element_text(vjust = 0.1, hjust=1)) +
    coord_flip()

ggplotly(Offences_by_street)

Pie chart

The pie chart below displays the distribution of outcomes for all crimes. As per the graph, 42.83% of investigations were complete with no suspects identified.

CrimeOutcome <- crime23 %>% 
 group_by(outcome_status) %>% 
  filter(!is.na(outcome_status)) %>%
  summarize(count = n())

TotalCount <- sum(CrimeOutcome$count)

OutcomeDistribution <- CrimeOutcome  %>% mutate(Percentage = round((count / TotalCount) * 100, digits = 2))

plot_ly(OutcomeDistribution, labels = ~outcome_status, values = ~Percentage, type = 'pie', marker = list(colors = essex_palette)) %>%
  layout(title = "Percentage Distribution of Outcomes For Violent-Crimes",
         showlegend = TRUE)

Density plot

For all reported offenses.

crime23_year <-  crime23 %>%
  group_by(date) %>%
  summarize(count = n())

ggplot(crime23_year, aes(x = count)) +
  geom_density(fill = "#AB274F", color = "black", alpha = 0.7) +
  labs(title = "Density Plot of All Reported Incidents",
       x = "Offence Counts",
       y = "Density") +
  theme_minimal()

Seasonal Boxplot

crime23_spring <- crime23 %>% filter(date %in% c('2023-03', '2023-04', '2023-05'))
crime23_summer <- crime23 %>% filter(date %in% c('2023-06', '2023-07', '2023-08'))
crime23_autumn <- crime23 %>% filter(date %in% c('2023-09', '2023-10', '2023-11'))
crime23_winter <- crime23 %>% filter(date %in% c('2023-12', '2023-01', '2023-02'))

combine_seasons <- rbind(mutate(crime23_spring, Season = "Spring"),
  mutate(crime23_summer, Season = "Summer"),
  mutate(crime23_autumn, Season = "Autumn"),
  mutate(crime23_winter, Season = "Winter"))

season_crime_freq <- combine_seasons %>% group_by(Season, category) %>% 
  summarize(avg = n()) 
## `summarise()` has grouped output by 'Season'. You can override using the
## `.groups` argument.

This boxplot displays the average number of crimes for each season. Each season is represented by a different colored box.

Seasonal_boxplot <- ggplot(season_crime_freq, aes(x = Season, y = avg, fill = Season)) +
  geom_boxplot() + 
  labs(x = 'Seasons', y = 'Offence rate', title = 'Crime rates among different seasons')
Seasonal_boxplot + scale_fill_manual(values = essex_palette)

Correlation analysis

temp2023$Date<- as.Date(temp2023$Date, format = "%Y-%m-%d")
temp2023$month <- format(temp2023$Date,"%Y-%m")

avgtemp_per_month <- temp2023 %>% group_by(month) %>% summarize(avg = mean(TemperatureCAvg))
names(avgtemp_per_month) = c('date', 'avg_temp')

temp_crime <- merge(avgtemp_per_month, crime23_year, by ='date')

correlation <- cor(temp_crime$count, temp_crime$avg_temp)
correlplot <- ggplot(temp_crime, aes(x = count, y = avg_temp)) +
  geom_point() +
  geom_smooth(method = 'lm', se = FALSE, colour = 'red') + 
    labs(title = paste("Correlation Plot (Correlation Coefficient: ", round(correlation, 2), ")"),
       x = "Count of Crimes", y = "Average Temperature") 
correlplot
## `geom_smooth()` using formula = 'y ~ x'

The correlation coefficient 0.24 indicates a weak linear relationship between the average monthly temperature and the number of crimes.

Map of Crimes Committed In Colchester Summer 2023

library(leaflet)
## Warning: package 'leaflet' was built under R version 4.3.3
crime23_june <- crime23 %>% filter(date %in% c('2023-06'))
map <- crime23_june %>%
  group_by(street_name, category, lat, long)


map$long <- as.numeric(map$long)
map$lat <- as.numeric(map$lat)

m <- leaflet(map) %>% 
  addTiles() %>% addCircleMarkers(popup = ~category) %>% 
  addCircleMarkers(data = crime23_summer[crime23_summer$category=="violent-crime",], group = "violent-crime",color="red", popup = ~category)
## Assuming "long" and "lat" are longitude and latitude, respectively
## Assuming "long" and "lat" are longitude and latitude, respectively
m

Time Series

library(lubridate)
library(xts)
## Warning: package 'xts' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following object is masked from 'package:leaflet':
## 
##     addLegend
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(dplyr)
library(lubridate)
library(ggplot2)
library(forecast)
## Warning: package 'forecast' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo